﻿// encodeconv.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include "Encode.h"
#include <fstream>
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <Windows.h>
#include <fstream>
#include <string>

std::wstring usage = \
	L"Wrote by youlanhai,github,Modified by liuzhaoyzz,2019.08.23\n"
	L"encodeconv -f fname -s srcEncode [-d][DestEncode] [-o][output file] [-nb]\n"
	L"encodeconv -p path -e extensions -s src encode [-d][DestEncode] [-o][output path] [-r] [-nb]\n\n"
	L"-e: extension of target files. eg. txt;h;cpp\n"
	L"-s: the source file encode. eg. auto,gbk,big5,utf-7,utf-8,ucs2,ucs-2le,ucs-2be.\n"
	L"-d: the dest file encode. default is utf-8\n"
	L"-o: the output file or path. default is same as the input file or path.\n"
	L"-r: convert rescursively.\n"
	L"-nb: don't add bom.\n\n"
	L"eg. convert a file: encodeconv -f test.txt -s auto -d utf-8 -o test2.txt\n"
	L"eg. convert a file: encodeconv -f test.txt -s gbk -d utf-8 -o test2.txt\n"
	L"eg. convert files in path: encodeconv -p c:/test/ -e txt;h;cpp -s gbk -d utf-8 -o d:/test/ \n"
	;


///查找参数名称
int findArgN(const wchar_t *cmd, int argc, wchar_t* argv[])
{
	for(int i=1; i<argc; ++i)
	{
		if(wcscmp(cmd, argv[i]) == 0) return i;
	}
	return -1;
}

///查找参数值
int findArgV(const wchar_t *cmd, int argc, wchar_t* argv[])
{
	int index = findArgN(cmd, argc, argv);
	if(index < 0) return -1;

	index += 1;
	if(index >= argc) return -1;

	return index;
}

BOOL CheckUnicodeWithoutBOM(const PBYTE pBuffer, long length)
{
	int i;
	DWORD nBytes = 0;
	UCHAR chr;

	BOOL bAllAscii = TRUE;
	for (i = 0; i < length; i++)
	{
		chr = *(pBuffer + i);
		if ((chr & 0x80) != 0)
			bAllAscii = FALSE;
		if (nBytes == 0)
		{
			if (chr >= 0x80)
			{
				if (chr >= 0xFC && chr <= 0xFD)
					nBytes = 6;
				else if (chr >= 0xF8)
					nBytes = 5;
				else if (chr >= 0xF0)
					nBytes = 4;
				else if (chr >= 0xE0)
					nBytes = 3;
				else if (chr >= 0xC0)
					nBytes = 2;
				else
				{
					return FALSE;
				}
				nBytes--;
			}
		}
		else
		{
			if ((chr & 0xC0) != 0x80)
			{
				return FALSE;
			}
			nBytes--;
		}
	}
	if (nBytes > 0)
	{
		return FALSE;
	}
	if (bAllAscii)
	{
		return FALSE;
	}
	return TRUE;
}

std::wstring DetectEncode(const PBYTE pBuffer, long length)
{
	if (pBuffer[0] == 0xFF && pBuffer[1] == 0xFE)
	{
		return  L"ucs-2le";//"Unicode little endian";  
	}
	else if (pBuffer[0] == 0xFE && pBuffer[1] == 0xFF)
	{
		return L"ucs-2be";		//"Unicode big endian";
	}
	else if (pBuffer[0] == 0xEF && pBuffer[1] == 0xBB && pBuffer[2] == 0xBF)
	{
		return L"utf-8"; 	//"utf-8 with BOM";    
	}
	else if (CheckUnicodeWithoutBOM(pBuffer, length))
	{
		return L"utf-8"; 	//"utf-8 without BOM"; 
	}
	else
	{
		return  L"gbk";		//"ANSI"; 
	}
}

std::wstring srcEncode;//定义全局变量srcEncode原文件编码格式
int wmain(int argc, wchar_t* argv[])
{
	if(argc < 3)
	{
		std::wcout<< usage <<std::endl;
		return 0;
	}

	std::wstring filter;

	bool isForFile;
	if(wcscmp(L"-f", argv[1]) == 0)
	{
		isForFile = true;
	}
	else if(wcscmp(L"-p", argv[1]) == 0)
	{
		isForFile = false;
	}
	else
	{
		std::wcout<< usage <<std::endl;
		return 0;
	}

	std::wstring srcName = argv[2];
	std::wstring destName;

	//find output file name.
	int index = findArgV(L"-o", argc, argv);
	if(index >= 0) destName = argv[index];
	else destName = srcName;

	//find src file encode.
	//std::wstring srcEncode;
	index = findArgV(L"-s", argc, argv);
	if(index <0)
	{
		std::wcout << "please input the source encoding with '-s srcEncode'!" << std::endl;
		return 0;
	} else {
		srcEncode = argv[index];
		//std::wcout<<srcEncode<<std::endl;
		if(wcscmp(argv[index], L"auto") == 0) 
		{
			//修改成自动检测源文件编码格式：
			LPCWSTR szFileName;
			szFileName=argv[2];
			//LPCWSTR szFileName=L"test.txt";//文件名字可以根据自己的需要修改，或者用变量输入
			HANDLE hFile = CreateFile(szFileName, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, NULL,
				OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
			//  共享方式打开，避免其他地方需要读写此文件
			if (INVALID_HANDLE_VALUE != hFile)
			{
				DWORD dwFileSize = GetFileSize(hFile, NULL);
				PBYTE pBuffer = (PBYTE)malloc(dwFileSize + 2);
				int iLen = 0;
				if (!ReadFile(hFile, pBuffer, dwFileSize, &dwFileSize, NULL))
				{
					free(pBuffer);
					return FALSE;
				}
				//CloseHandle(hFile);
				pBuffer[dwFileSize] = '\0';
				pBuffer[dwFileSize + 1] = '\0';
				//std::wstring srcEncode;
				srcEncode = DetectEncode(pBuffer, dwFileSize);
				CloseHandle(hFile);
				free(pBuffer);
			}

		}
		std::wcout<<"srcEncode:"<<srcEncode<<std::endl;
	}

	//find dest file encode.
	std::wstring destEncode = L"utf-8";
	index = findArgV(L"-d", argc, argv);
	if(index >= 0)
	{
		destEncode = argv[index];
	}

	bool rescursively = false;
	if(findArgN(L"-r", argc, argv) >= 0)
	{
		rescursively = true;
	}

	bool useBom = true;
	if(findArgN(L"-nb", argc, argv) >= 0)
	{
		useBom = false;
	}

	size_t result;
	if(isForFile)
	{

#ifdef LAZY_DEBUG_MSG
		std::wcout<<L"convertFile: "
			<<destName <<L", "
			<<destEncode <<L", "
			<<srcName <<L", "
			<<srcEncode <<L", "
			<<std::endl;
#endif

		result = Lazy::convertFile(destName, destEncode, srcName, srcEncode, useBom);

	}
	else
	{
		//parse the target files extension
		index = findArgV(L"-e", argc, argv);
		if(index < 0)
		{
			std::wcout << "please input extensions of the taget files with '-e extesions'." << std::endl;
			return 0;
		}
		filter = argv[index];

#ifdef LAZY_DEBUG_MSG
		std::wcout<<L"convertPath: "
			<<destName <<L", "
			<<destEncode <<L", "
			<<srcName <<L", "
			<<srcEncode <<L", "
			<<filter <<L", "
			<<rescursively <<L", "
			<<std::endl;
#endif

		result = Lazy::convertPath(destName, destEncode, srcName, srcEncode, useBom, filter, rescursively);
	}

	if(result == Lazy::Error::ok)
	{
		std::wcout<<L"info: convert '"<<srcName
			<<"' to '"<< destName <<"' success." <<std::endl;
	}
	else
	{
		std::wcout<<L"error: convert '"<<srcName
			<<"' to '"<< destName <<"' failed! code:" <<result <<std::endl;
	}


	return 0;
}

